# script for video on goodness of fit

#  First look at a test of a single proportion.
#  This is from Topic 17
#
source("../hypo_prop.R")
hypoth_test_prop( 0.1, 13, 225, 0, 0.02)

# for goodness of fit we want the proportions for all
#  of the possible outcomes.  These are the null 
#  hypothesis proportions:
#
null_props <- c(0.15, 0.09, 0.1, 0.15,
                0.135, 0.08, 0.14, 0.155)
#  Then, knowing that we will take or even have taken a sample
#  of size 225, find the expected values for each outcome
expected <- null_props * 225
expected

source("../gnrnd5.R")
gnrnd5(95632022407,985785588)
L1
table(L1)
#so here are the observed values
observed <- c(27, 19, 14, 39, 30, 18, 39, 39 )
observed
#  then we want to find the   observed - expected values
diff <- observed - expected
diff
#  and we move on from there to get the squares of those
#  differences
diff_sqr <- diff^2
diff_sqr
#  That magnified the values that we big differences and
#  it made everything positive.  Now divide each of those
#  by the respective   "expected"  value so that the same 
#  differences from larger expected values carries less 
#  weight than do similar differences form lower expected
#  values.
quotients <- diff_sqr / expected
quotients
#
#  Now to find the overall "strangeness" of our observed
#  values from the expected values we get the sum of
#  all of those quotients.
how_strange <- sum( quotients )
how_strange
#
#  Even if our true population had exactly the proportions 
#  given in the null hypothesis we would not expect a sample
#  of 225 items to have those same proportions.  Each such 
#  sample would have differences between the observed values 
#  and the expected values.  Each such sample would therefore 
#  have a value for  "how_strange".  The distribution of 
#  those "how_strange" values will be a chi-squared distribution
#  with the degrees of freedom equal to one less than the
#  number of different outcomes.  We have 8 possible outcomes
#  so there are 7 degrees of freedom.  THerefore, we can say,
#  if the null hypothesis is true then how strange is it to
#  get a "how_strange" value of 7.733185 or higher?
pchisq( 7.733185, 7, lower.tail=FALSE)
#  That is not strange at all.

#  Or we could find the critical value for 7 degrees of freedom
#  and for a level of significance of 0.02.
qchisq( 0.02, 7, lower.tail=FALSE)

###  we can do all of this in one step with the goodfit()
###  function
source( "../goodfit.R")
goodfit( 1:8, null_props, observed, 0.02)